####Replication material for: Who are the cosmopolitans? 
#### Author: Ronja Sczepanski




rm(list=ls())


# All libraries necessary
load.lib<-c("quantetda", "ggplot2", "dplyr", "tidyr", "tidytext", "stringr", "patchwork")

# select only the packages that aren't currently installed.

install.lib <- load.lib[!(load.lib %in% installed.packages())]

# install the missing packages, including their dependency.
for(lib in install.lib){
  install.packages(lib,dependencies=TRUE) 
}

###Libraries
library(quanteda)
library(stringr)
library(dplyr)
library(tidyr)
library(tidytext)
library(ggplot2)
library(patchwork)



#Read data files 

###FOR REPLICATION, YOU NEED TO WRITE DOWN THE PATH TO THE DATA HERE!
path_to_data <- ".../"

dat_at_word_all <- read.csv(paste0(path_to_data, "open_ended_data_at.csv"), encoding = "utf-8")
dat_it_word_all <- read.csv(paste0(path_to_data, "open_ended_data_it.csv"), encoding = "utf-8")



summary_dat <- dat_at_word_all %>% 
  group_by(word_num, token) %>% 
  summarise(mean_rat = mean(rating, na.rm=TRUE), 
            freq= n()) %>% 
  ungroup() %>% 
  filter(freq > 7)

summary_dat_it <- dat_it_word_all %>% 
  group_by(word_num, token) %>% 
  summarise(mean_rat = mean(rating, na.rm=TRUE), 
            freq= n()) %>% 
  ungroup() %>% 
  filter(freq > 7)

nas <- c("nan", "ka", "nix", "x", "know", "knowing", "nn", "k", "non", "so")

summary_dat_small <- summary_dat %>% 
  filter(word_num=="Q25"&!token %in% nas) %>% 
  slice_max(freq, n=50)

summary_dat_it_small <- summary_dat_it %>% 
  filter(word_num=="Q25"&!token %in% nas) %>% 
  mutate(n=sum(freq)/4) %>% 
  slice_max(freq, n=50)


data_list <- list()
variable_list <- unique(summary_dat$word_num)
for(i in 1:length(variable_list)){
  dat<- subset(summary_dat,  word_num==variable_list[i])
  data_list[[i]] <- dat
}


plot_aut <- summary_dat_small %>% 
  arrange(freq) %>% 
  # filter(Group != "Don")
  ggplot(
    aes(
      y = reorder(token, freq)
      , label = token
      , x = freq
      , color = mean_rat
    )
  ) +
  geom_point() +
  labs(
    x = "Frequency", 
    y = NULL,
    subtitle = "(a) Austria"
  ) +
  theme_minimal() + 
  scale_color_viridis_c(limits=c(-2, 3))+
  labs(color="Positive-\nNegative")+
  theme(
    plot.subtitle = element_text(hjust = .5, size = 12),
    axis.text.y.left = element_text(size = 10)
  )

plot_aut

ggsave(width = 16,
       height = 16,
       units = "cm", 
       dpi = 500,
       device="png",
       filename = "asso_european_at.png", 
       plot = plot_aut,
       path = path_to_data)


plot_ita <- plot_aut
plot_ita$data <- summary_dat_it_small

plot_ita <- plot_ita + labs(subtitle = "(b) Italy")


ggsave(width = 16,
       height = 16,
       units = "cm", 
       dpi = 300,
       device="png",
       filename = "asso_european_it.png", 
       plot = plot_ita,
       path = path_to_data)

### Figure 1 appendix 

p <- plot_aut + plot_ita +  guide_area()+plot_layout(guides = "collect")


ggsave(width = 19,
       height = 22,
       units = "cm", 
       dpi = 600,
       device="png",
       filename = "asso_european.png", 
       plot = p,
       path = path_to_data)


p

